1
|
|
|
'use strict'; |
2
|
|
|
|
3
|
|
|
const Wappalyzer = require('./wappalyzer'); |
4
|
|
|
const url = require('url'); |
5
|
|
|
const fs = require('fs'); |
6
|
|
|
const path = require('path'); |
7
|
|
|
const Browser = require('zombie'); |
8
|
|
|
|
9
|
|
|
const json = JSON.parse(fs.readFileSync(path.resolve(__dirname + '/apps.json'))); |
|
|
|
|
10
|
|
|
|
11
|
|
|
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/; |
12
|
|
|
|
13
|
|
|
class Driver { |
14
|
|
|
constructor(pageUrl, options) { |
15
|
|
|
this.options = Object.assign({}, { |
16
|
|
|
password: '', |
17
|
|
|
proxy: null, |
18
|
|
|
username: '', |
19
|
|
|
chunkSize: 5, |
20
|
|
|
debug: false, |
21
|
|
|
delay: 500, |
22
|
|
|
htmlMaxCols: 2000, |
23
|
|
|
htmlMaxRows: 3000, |
24
|
|
|
maxDepth: 3, |
25
|
|
|
maxUrls: 10, |
26
|
|
|
maxWait: 5000, |
27
|
|
|
recursive: false, |
28
|
|
|
userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)', |
29
|
|
|
}, options || {}); |
30
|
|
|
|
31
|
|
|
this.options.debug = Boolean(+this.options.debug); |
32
|
|
|
this.options.recursive = Boolean(+this.options.recursive); |
33
|
|
|
this.options.delay = this.options.recursive ? parseInt(this.options.delay, 10) : 0; |
34
|
|
|
this.options.maxDepth = parseInt(this.options.maxDepth, 10); |
35
|
|
|
this.options.maxUrls = parseInt(this.options.maxUrls, 10); |
36
|
|
|
this.options.maxWait = parseInt(this.options.maxWait, 10); |
37
|
|
|
this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10); |
38
|
|
|
this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10); |
39
|
|
|
|
40
|
|
|
this.origPageUrl = url.parse(pageUrl); |
41
|
|
|
this.analyzedPageUrls = []; |
42
|
|
|
this.apps = []; |
43
|
|
|
this.meta = {}; |
44
|
|
|
|
45
|
|
|
this.wappalyzer = new Wappalyzer(); |
46
|
|
|
|
47
|
|
|
this.wappalyzer.apps = json.apps; |
48
|
|
|
this.wappalyzer.categories = json.categories; |
49
|
|
|
|
50
|
|
|
this.wappalyzer.parseJsPatterns(); |
51
|
|
|
|
52
|
|
|
this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type); |
53
|
|
|
this.wappalyzer.driver.displayApps = (detected, meta, context) => this.displayApps(detected, meta, context); |
54
|
|
|
|
55
|
|
|
process.on('uncaughtException', e => this.wappalyzer.log('Uncaught exception: ' + e.message, 'driver', 'error')); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
analyze() { |
59
|
|
|
this.time = { |
60
|
|
|
start: new Date().getTime(), |
61
|
|
|
last: new Date().getTime(), |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
return this.crawl(this.origPageUrl); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
log(message, source, type) { |
68
|
|
|
this.options.debug && console.log('[wappalyzer ' + type + ']', '[' + source + ']', message); |
|
|
|
|
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
displayApps(detected, meta) { |
72
|
|
|
this.meta = meta; |
73
|
|
|
|
74
|
|
|
Object.keys(detected).forEach(appName => { |
75
|
|
|
const app = detected[appName]; |
76
|
|
|
|
77
|
|
|
var categories = []; |
78
|
|
|
|
79
|
|
|
app.props.cats.forEach(id => { |
80
|
|
|
var category = {}; |
81
|
|
|
|
82
|
|
|
category[id] = json.categories[id].name; |
83
|
|
|
|
84
|
|
|
categories.push(category) |
85
|
|
|
}); |
86
|
|
|
|
87
|
|
|
if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) { |
88
|
|
|
this.apps.push({ |
89
|
|
|
name: app.name, |
90
|
|
|
confidence: app.confidenceTotal.toString(), |
91
|
|
|
version: app.version, |
92
|
|
|
icon: app.props.icon || 'default.svg', |
93
|
|
|
website: app.props.website, |
94
|
|
|
categories |
95
|
|
|
}); |
96
|
|
|
} |
97
|
|
|
}); |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
fetch(pageUrl, index, depth) { |
101
|
|
|
// Return when the URL is a duplicate or maxUrls has been reached |
102
|
|
|
if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) { |
103
|
|
|
return Promise.resolve(); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
this.analyzedPageUrls.push(pageUrl.href); |
107
|
|
|
|
108
|
|
|
const timerScope = { |
109
|
|
|
last: new Date().getTime() |
110
|
|
|
}; |
111
|
|
|
|
112
|
|
|
this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope); |
113
|
|
|
|
114
|
|
|
return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve))); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
visit(pageUrl, timerScope, resolve) { |
118
|
|
|
const browser = new Browser({ |
119
|
|
|
proxy: this.options.proxy, |
120
|
|
|
silent: true, |
121
|
|
|
strictSSL: false, |
122
|
|
|
userAgent: this.options.userAgent, |
123
|
|
|
waitDuration: this.options.maxWait, |
124
|
|
|
}); |
125
|
|
|
|
126
|
|
|
browser.on('authenticate', auth => { |
127
|
|
|
auth.username = this.options.username; |
128
|
|
|
auth.password = this.options.password; |
129
|
|
|
}); |
130
|
|
|
|
131
|
|
|
this.timer('browser.visit start; url: ' + pageUrl.href, timerScope); |
132
|
|
|
|
133
|
|
|
browser.visit(pageUrl.href, () => { |
134
|
|
|
this.timer('browser.visit end; url: ' + pageUrl.href, timerScope); |
135
|
|
|
|
136
|
|
|
if ( !this.responseOk(browser, pageUrl) ) { |
137
|
|
|
return resolve(); |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
const headers = this.getHeaders(browser); |
141
|
|
|
const html = this.getHtml(browser); |
142
|
|
|
const scripts = this.getScripts(browser); |
143
|
|
|
const js = this.getJs(browser); |
144
|
|
|
const cookies = this.getCookies(browser); |
145
|
|
|
|
146
|
|
|
this.wappalyzer.analyze(pageUrl, { |
147
|
|
|
headers, |
148
|
|
|
html, |
149
|
|
|
scripts, |
150
|
|
|
js, |
151
|
|
|
cookies, |
152
|
|
|
}) |
153
|
|
|
.then(() => { |
154
|
|
|
const links = Array.prototype.reduce.call( |
155
|
|
|
browser.document.getElementsByTagName('a'), (results, link) => { |
156
|
|
|
if ( link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname) ) { |
157
|
|
|
link.hash = ''; |
158
|
|
|
|
159
|
|
|
results.push(url.parse(link.href)); |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
return results; |
163
|
|
|
}, [] |
164
|
|
|
); |
165
|
|
|
|
166
|
|
|
return resolve(links); |
167
|
|
|
}); |
|
|
|
|
168
|
|
|
}); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
responseOk(browser, pageUrl) { |
172
|
|
|
// Validate response |
173
|
|
|
const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
174
|
|
|
|
175
|
|
|
if ( !resource ) { |
176
|
|
|
this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error'); |
177
|
|
|
|
178
|
|
|
return false; |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
if ( resource.response.status !== 200 ) { |
182
|
|
|
this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error'); |
183
|
|
|
|
184
|
|
|
return false; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
const headers = this.getHeaders(browser); |
188
|
|
|
|
189
|
|
|
// Validate content type |
190
|
|
|
const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null; |
191
|
|
|
|
192
|
|
|
if ( !contentType || !/\btext\/html\b/.test(contentType) ) { |
193
|
|
|
this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver'); |
194
|
|
|
|
195
|
|
|
this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1); |
196
|
|
|
|
197
|
|
|
return false; |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
// Validate document |
201
|
|
|
if ( !browser.document || !browser.document.documentElement ) { |
202
|
|
|
this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error'); |
203
|
|
|
|
204
|
|
|
return false; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
return true; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
getHeaders(browser) { |
211
|
|
|
const headers = {}; |
212
|
|
|
|
213
|
|
|
const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null; |
214
|
|
|
|
215
|
|
|
if ( resource ) { |
216
|
|
|
resource.response.headers._headers.forEach(header => { |
217
|
|
|
if ( !headers[header[0]] ){ |
218
|
|
|
headers[header[0]] = []; |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
headers[header[0]].push(header[1]); |
222
|
|
|
}); |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
return headers; |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
getHtml(browser) { |
229
|
|
|
let html = ''; |
230
|
|
|
|
231
|
|
|
try { |
232
|
|
|
html = browser.html() |
233
|
|
|
.split('\n') |
234
|
|
|
.slice(0, this.options.htmlMaxRows / 2).concat(html.slice(html.length - this.options.htmlMaxRows / 2)) |
235
|
|
|
.map(line => line.substring(0, this.options.htmlMaxCols)) |
236
|
|
|
.join('\n'); |
237
|
|
|
} catch ( error ) { |
238
|
|
|
this.wappalyzer.log(error.message, 'browser', 'error'); |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
return html; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
getScripts(browser) { |
245
|
|
|
if ( !browser.document || !browser.document.scripts ) { |
246
|
|
|
return []; |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
const scripts = Array.prototype.slice |
250
|
|
|
.apply(browser.document.scripts) |
251
|
|
|
.filter(script => script.src) |
252
|
|
|
.map(script => script.src); |
253
|
|
|
|
254
|
|
|
return scripts; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
getJs(browser) { |
258
|
|
|
const patterns = this.wappalyzer.jsPatterns; |
259
|
|
|
const js = {}; |
260
|
|
|
|
261
|
|
|
Object.keys(patterns).forEach(appName => { |
262
|
|
|
js[appName] = {}; |
263
|
|
|
|
264
|
|
|
Object.keys(patterns[appName]).forEach(chain => { |
265
|
|
|
js[appName][chain] = {}; |
266
|
|
|
|
267
|
|
|
patterns[appName][chain].forEach((pattern, index) => { |
268
|
|
|
const properties = chain.split('.'); |
269
|
|
|
|
270
|
|
|
let value = properties.reduce((parent, property) => { |
271
|
|
|
return parent && parent.hasOwnProperty(property) ? parent[property] : null; |
272
|
|
|
}, browser.window); |
273
|
|
|
|
274
|
|
|
value = typeof value === 'string' || typeof value === 'number' ? value : !!value; |
275
|
|
|
|
276
|
|
|
if ( value ) { |
277
|
|
|
js[appName][chain][index] = value; |
278
|
|
|
} |
279
|
|
|
}); |
280
|
|
|
}); |
281
|
|
|
}); |
282
|
|
|
|
283
|
|
|
return js; |
284
|
|
|
} |
285
|
|
|
|
286
|
|
|
getCookies(browser) { |
287
|
|
|
const cookies = []; |
288
|
|
|
|
289
|
|
|
if ( browser.cookies ) { |
290
|
|
|
browser.cookies.forEach(cookie => cookies.push({ |
291
|
|
|
name: cookie.key, |
292
|
|
|
value: cookie.value, |
293
|
|
|
domain: cookie.domain, |
294
|
|
|
path: cookie.path, |
295
|
|
|
})); |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
return cookies; |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
crawl(pageUrl, index = 1, depth = 1) { |
302
|
|
|
pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname; |
303
|
|
|
|
304
|
|
|
return new Promise(resolve => { |
305
|
|
|
this.fetch(pageUrl, index, depth) |
306
|
|
|
.catch(() => {}) |
307
|
|
|
.then(links => { |
308
|
|
|
if ( links && this.options.recursive && depth < this.options.maxDepth ) { |
309
|
|
|
return this.chunk(links.slice(0, this.options.maxUrls), depth + 1); |
310
|
|
|
} else { |
311
|
|
|
return Promise.resolve(); |
312
|
|
|
} |
313
|
|
|
}) |
314
|
|
|
.then(() => { |
315
|
|
|
resolve({ |
316
|
|
|
urls: this.analyzedPageUrls, |
317
|
|
|
applications: this.apps, |
318
|
|
|
meta: this.meta |
319
|
|
|
}); |
320
|
|
|
}); |
321
|
|
|
}); |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
chunk(links, depth, chunk = 0) { |
325
|
|
|
if ( links.length === 0 ) { |
326
|
|
|
return Promise.resolve(); |
327
|
|
|
} |
328
|
|
|
|
329
|
|
|
const chunked = links.splice(0, this.options.chunkSize); |
330
|
|
|
|
331
|
|
|
return new Promise(resolve => { |
332
|
|
|
Promise.all(chunked.map((link, index) => this.crawl(link, index, depth))) |
333
|
|
|
.then(() => this.chunk(links, depth, chunk + 1)) |
334
|
|
|
.then(() => resolve()); |
335
|
|
|
}); |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
sleep(ms) { |
339
|
|
|
return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve(); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
timer(message, scope) { |
343
|
|
|
const time = new Date().getTime(); |
344
|
|
|
const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's'; |
345
|
|
|
const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's'; |
346
|
|
|
|
347
|
|
|
this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver'); |
348
|
|
|
|
349
|
|
|
scope.last = time; |
350
|
|
|
} |
351
|
|
|
}; |
352
|
|
|
|
353
|
|
|
module.exports = Driver; |
354
|
|
|
|